import requests
from fake_useragent import UserAgent
import pandas as pd
import time
import datetime
from sqlalchemy import create_engine

#engine = create_engine("mysql+pymysql://fecy:Jili@123@172.23.135.5:3306/scrapy")
engine = create_engine("mysql+pymysql://cl007:ChengLei-0711@127.0.0.1:3306/scrapy?charset=utf8")

headers={
        'User-agent':str(UserAgent(verify_ssl=False).random),
        'Cookie':'BAIDUID=14314D9D1182C0F3F18E4D32FB781398:FG=1; BIDUPSID=14314D9D1182C0F3F18E4D32FB781398; PSTM=1589471352; BDUSS=1dPaWxDaFI5akdiSlVpaUN5akR3TmZWRXhNM1V1eGF1V0J3OEhjaklCZ1NqZTFlRVFBQUFBJCQAAAAAABAAAAEAAACfVjf7YXNpbW92MDUxOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABIAxl4SAMZeb; ZD_ENTRY=empty; bdindexid=4gqnrgiqkck0gcbrufvjkka5o2; RT="z=1&dm=baidu.com&si=avs4x0yocsr&ss=kapk89x0&sl=5&tt=3pk&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf"',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
    }

url = 'https://insight.baidu.com/base/search/rank/list?' #'搜索指数'
url2 ='https://insight.baidu.com/base/news/rank/general?' #咨询指数

def list_data_vehicle(url,startdate):
    parsed1 = {
        'pageSize': '3000',
        'dateType': startdate,
        'dimensionid': '1',  # 车系
    }
    print(url)
    req = requests.get(url, headers=headers, params=parsed1).json()
    req_results = req.get('data').get('results').get('current')
    return req_results

def list_data_brand(url,startdate):
    parsed1 = {
        # 每页显示的数量
        'pageSize': '200',
        'dateType': startdate,
        'dimensionid': '2',  # 品牌
    }
    req = requests.get(url, headers=headers, params=parsed1).json()
    req_results = req.get('data').get('results').get('current')
    return req_results

if __name__ == '__main__':
    begin = datetime.date(2018, 1, 1)
    end = datetime.date(2021, 12, 20)
    while begin < end :
        print(begin)
        datestart = begin.strftime('%Y%m%d')
        data1 = list_data_vehicle(url,datestart)
        print(data1)
        data2 = list_data_vehicle(url2,datestart)
        data3 = list_data_brand(url,datestart)
        data4 = list_data_brand(url2,datestart)
        data1 = pd.DataFrame(data1)
        data2 = pd.DataFrame(data2)
        data3 = pd.DataFrame(data3)
        data4 = pd.DataFrame(data4)
        data1['date'] = datestart
        data2['date'] = datestart
        data3['date'] = datestart
        data4['date'] = datestart
        data1.to_sql('baidu_search_vehicle', engine, if_exists='append', index=False)
        data2.to_sql('baidu_general_vehicle', engine, if_exists='append', index=False)
        data3.to_sql('baidu_search_brand', engine, if_exists='append', index=False)
        data4.to_sql('baidu_general_brand', engine, if_exists='append', index=False)
        begin = begin + datetime.timedelta(days=1)